/*
NOTES TO DATA ANALYSIS:
*/

clear all
set more off
capture log close

********************************************************************************

log using "$logfiles/analysis_figures_`c(current_date)'.log", replace

use "$covidclean/smscovid_clean.dta", clear

***Keep relevant observations: consent taken and above 18 years of age, non-missing treatment arms
	keep if consent==1 & age>=18
	keep if ~missing(treatment_arm)
	count //3964

***HOUSEKEEPING - BEHAVIOR TREATMENTS, OUTCOMES
		
	***Create dummies with treatment arms interacted with behavior
		foreach v in $treat_pool $treat_frames $treat_timings $treat_arms {
			gen `v'_sd = `v'*behavior_sd
			gen `v'_hw = `v'*behavior_hw
		}
		
	***Outcomes
		global keysdhw know_sd act_sd know_hw act_hw
		global risks risk_sick_h risk_die_h

********************************************************************************
***FIGURE: ITT RESULTS FOR MAIN OUTCOMES FOR POOLED TREATMENT
********************************************************************************
				
		qui foreach v of varlist $keysdhw $risks {
			
				*Regression
				noi reghdfe `v' $treat_pool_sd $treat_pool_hw if $sample, a($studycontrols $covariates) vce(robust)
				
				* Save b and lower/upper 95% CI in a matrix
				mat rtable = r(table)
				
				* Need to add values for constant to the other coefficients, e.g.: b_cons + treatment_pooled_sd
				mat cons =        ( rtable["b", "_cons"] \ rtable["ll", "_cons"] \ rtable["ul", "_cons"] )
				mat sd   = cons + ( rtable["b", "$treat_pool_sd"] \ rtable["ll", "$treat_pool_sd"] \ rtable["ul", "$treat_pool_sd"] )
				mat hw   = cons + ( rtable["b", "$treat_pool_hw"] \ rtable["ll", "$treat_pool_hw"] \ rtable["ul", "$treat_pool_hw"] )
				
				
				* Collect for constant, SD and HW
				mat tmp = ( cons, sd, hw )
					
				* Assign the correct names that we'll also use as labels for the graph
				local cnames: colnames tmp
				local cnames: subinstr local cnames "treatment_pooled_sd" "Distancing"
				local cnames: subinstr local cnames "treatment_pooled_hw" "Handwashing"
				local cnames: subinstr local cnames "_cons"               "Control"
				
				mat colnames tmp = `cnames'
				mat list tmp	
				
				* Use outcome variable as column equation so know which is which
				mat coleq    tmp = `v'

				mat out = nullmat(out) \ tmp'
				mat drop tmp
				
		}
				
		preserve 
	
			clear
			svmat out, names(col)
			local rownames : rownames out
			local roweqs   : roweq   out
			local c : word count `rownames'
			
			* Add matrix row names to dataset
			gen coeff   = ""
			gen outcome = ""
			forvalues i = 1/`c' {
				replace coeff = "`:word `i' of `rownames''" in `i'
				replace outcome = "`:word `i' of `roweqs''" in `i'
			}
			
			* Adjust so that labels are in 2 rows in the graph
			replace coeff = `" "Control"  "'  if coeff=="Control"
			replace coeff = `" "Distancing" "Treatment" "'  if coeff=="Distancing"
			replace coeff = `" "Handwashing" "Treatment" "'  if coeff=="Handwashing"
			
			encode outcome, gen( y )
			encode coeff, gen( arm )
			
			list
			
			* Now create 1 graph for each of the 4 outcomes
			foreach v in $keysdhw  $risks {
					
					*Load output matrix with exact p-values
					matload `v'_p, path($tables) saving
					
					* Can just set the display format instead of rounding (which sometimes gives funky numbers)
					local psd: di %4.3fc `v'_p[1,3]
					local phw: di %4.3fc `v'_p[2,3]
						
					* Title for this graph
					if "`v'"=="know_sd"  local graphtitle = "Distancing - Knowledge"  
					if "`v'"=="act_sd"   local graphtitle = "Distancing - Action"     
					if "`v'"=="know_hw"  local graphtitle = "Handwashing - Knowledge" 
					if "`v'"=="act_hw"   local graphtitle = "Handwashing - Action"
					if "`v'"=="risk_sick_h" local graphtitle = "Risk of getting sick from COVID-19"
					if "`v'"=="risk_die_h"  local graphtitle = "Risk of dying from COVID-19"
				
					* For know_sd graph (which goes into top-left of the combined graph) also label the CI and explain randomization p value
					if "`v'"=="know_sd" {
						qui sum ul if outcome=="`v'" & arm==2
						local cilabel = `" text( `r(mean)' 2 "95% CI", place(e) size(small) margin(l+2) ) "'
						local psd = `" "Randomization" "p-value" "[`psd']" "'
					}
					
					* For all others merely add the [ ] around the p-value
					else {
						local cilabel = " "
						local psd = `" "[`psd']" "'
					}
					
					local phw = `" "[`phw']" "'
						 
					* Graph where the control bar is green and the others are blue-ish
					noi twoway bar  b     arm if arm==1 & outcome=="`v'", bcolor(plg2)  lwidth(none) barwidth(.7) ||   /*
						*/ bar  b     arm if arm!=1 & outcome=="`v'", bcolor(plb1)  lwidth(none) barwidth(.7) ||   /*
						*/ rcap ul ll arm if arm!=1 & outcome=="`v'", lcolor(black) lwidth(thick)            || , /*
						*/ `cilabel' /*
						*/ text( 0.02  2 `psd', place(n) size(small) ) /*
						*/ text( 0.02  3 `phw', place(n) size(small) ) /*
						*/ ylabel( 0(.2)1, labsize(small) nogrid ) ytick( 0(0.1)1, nogrid ) ytitle("", size(small) ) xtitle("") xlabel( 0.6 " " 1 2 3 3.4 " ", valuelabel labsize(small) nogrid ) /*
						*/ title( "`graphtitle'", margin(b=-5) )/*
						*/ legend(off) scheme(plotplain ) plotregion(style(none) lcolor(none)) graphregion(fcol(white) lcol(white))  name( `v'_p, replace ) plotregion( m(b=0) )
						 
				}
				
				
		restore 
	
		*FIGURE 1:
		graph combine know_sd_p act_sd_p know_hw_p act_hw_p, plotregion(style(none) lcolor(none)) graphregion(fcol(white) lcol(white)) 
 		graph export "$figures/pooled_itt.eps", as(eps) replace
		
		graph combine risk_sick_h_p risk_die_h_p, plotregion(style(none) lcolor(none)) graphregion(fcol(white) lcol(white))  
		graph export "$figures/pooled_risk.eps", as(eps) replace

log close
exit, clear			